In [1]:
import pandas as pd, numpy as np
import kendo_romania

Read data


In [2]:
matches={i:{} for i in range(1993,2019)}
Import data

2018

CR


In [3]:
filename='rawdata/2018/CR/CR25 - Public.xlsx'
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
             'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10,
             'shinpan':{'fukushin1':16,'shushin':17,'fukushin2':18}}
matches[2018]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3)

SL


In [5]:
filename='rawdata/2018/SL/Prezenta SL_WKC17.xlsx'
sheetname=['F','M']
matches[2018]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,5)

CN


In [17]:
filename='rawdata/2018/CN/Event management CN25.xlsx'
sheetname='Shiai'
column_keys={'match_type':3,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
             'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10,
             'shinpan':{'fukushin1':16,'shushin':17,'fukushin2':18}}
shift=-1
matches[2018]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)

In [18]:
matches[2018]['CN'][-13]


Out[18]:
{'match_type': 'rawdata/2018/CN/Event management CN25.xlsx#ES - F',
 'aka': {'name': 'Chirea V. (CRK)',
  'hansoku': '▲',
  'point1': 'M',
  'point2': nan,
  'point3': nan},
 'shiro': {'name': 'Csala D. (BTK)',
  'hansoku': nan,
  'point1': nan,
  'point2': nan,
  'point3': nan},
 'outcome': 1,
 'shinpan': {'fukushin1': 'Mandia F.',
  'shushin': 'Arabadjiyski A.',
  'fukushin2': 'Crăciunel I.'}}

2017

CN


In [19]:
categories=['Individual masculin','Echipe']
filename=['rawdata/2017/CN/'+i+'.xlsx' for i in categories]
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
             'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10,
             'shinpan':{'fukushin1':16,'shushin':17,'fukushin2':18}}
shift=0
matches[2017]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [20]:
categories=['Individual juniori mici','Individual juniori mari','Individual feminin']
filename=['rawdata/2017/CN/'+i+'.xlsx' for i in categories]
shift=-1
matches[2017]['CN']=matches[2017]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

CR


In [21]:
categories=['Individual masculin']
filename=['rawdata/2017/CR/'+i+'.xlsx' for i in categories]
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
             'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10}
shift=2
matches[2017]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [22]:
categories=['Individual juniori','Individual veterani','Individual feminin']
filename=['rawdata/2017/CR/'+i+'.xlsx' for i in categories]
shift=-1
matches[2017]['CR']=matches[2017]['CR']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [23]:
categories=['Echipe']
filename=['rawdata/2017/CR/'+i+'.xlsx' for i in categories]
shift=0
matches[2017]['CR']=matches[2017]['CR']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

SL


In [24]:
filename='rawdata/2017/SL/Prezenta.xlsx'
sheetname=['F','M','J']
matches[2017]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)

2016

SL


In [25]:
filename='rawdata/2016/SL/Event management - stagiul 4.xlsx'
sheetname=['F','M']
matches[2016]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)

In [26]:
sheetname=['J']
matches[2016]['SL']=matches[2016]['SL']+\
        kendo_romania.get_matches_from_table(filename,sheetname,5)

CN


In [27]:
categories=['Individual masculin']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
             'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10}
shift=2
matches[2016]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [28]:
categories=['Individual feminin']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
shift=-1
matches[2016]['CN']=matches[2016]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [29]:
categories=['Echipe','Male team']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
shift=0
matches[2016]['CN']=matches[2016]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [30]:
categories=['Junior 1 individual','Junior 2 individual']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
shift=-1
matches[2016]['CN']=matches[2016]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

CR


In [31]:
filename='rawdata/2016/CR/Event management_CR23.2016.xlsx'
sheetname=['IF_m','IJ_m','IM_m','IS_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2016]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [ ]:
sheetname=['EJ_m','ES_m']
matches[2016]['CR']=matches[2016]['CR']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

2015

SL


In [ ]:
filename='rawdata/2015/SL/Event management - stagiul 5.xlsx'
sheetname=['SF_s','SM_s']
matches[2015]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)

CN


In [ ]:
filename='rawdata/2015/CN/Event management_CN22.2015.xlsx'
sheetname=['IF_m','IJ2_m','IM_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2015]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [ ]:
sheetname='E_m'
matches[2015]['CN']=matches[2015]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

CR


In [ ]:
filename='rawdata/2015/CR/Event management_CR22.2015.xlsx'
sheetname=['IF_m','IS_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2015]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [ ]:
filename='rawdata/2015/CR/Event management_CR22.2015.xlsx'
sheetname=['IJ1_s']
matches[2015]['CR']=matches[2015]['CR']+\
                kendo_romania.get_matches_from_table(filename,
                    sheetname,skiprows=7,shift=1,nrows=9)

In [ ]:
filename='rawdata/2015/CR/Event management_CR22.2015.xlsx'
sheetname=['IJ2_s']
matches[2015]['CR']=matches[2015]['CR']+\
                kendo_romania.get_matches_from_table(filename,
                    sheetname,skiprows=8,shift=12,nrows=8)
matches[2015]['CR']=matches[2015]['CR']+\
                kendo_romania.get_matches_from_table(filename,
                    sheetname,skiprows=16,shift=12,nrows=8)

In [ ]:
sheetname=['IM_s']
column_keys={'match_type':19,'aka':{'name':20,'point1':21},
             'shiro':{'name':24,'point1':23},'outcome':22}
shift=0
matches[2015]['CR']=matches[2015]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=10
matches[2015]['CR']=matches[2015]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)

2014

SL


In [ ]:
filename='rawdata/2014/SL/Lista de participanti 6.xlsx'
sheetname=['SF_s','SM_s','J_s']
matches[2014]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)

CR


In [ ]:
filename='rawdata/2014/CR/Event management_CR21.2014.xlsx'
sheetname=['IC-10_m','IC_m','IJ_m','IS_m','IF_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2014]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [ ]:
sheetname=['IM_s']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=8
matches[2014]['CR']=matches[2014]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,8,shift=shift)

CN


In [ ]:
filename='rawdata/2014/CN/Event management_CN21.2014 - v2.xlsx'
sheetname=['IF_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2014]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [ ]:
sheetname=['IM_s']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=19
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=29
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)

In [ ]:
sheetname=['IJ1_s']
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_table(filename,sheetname,7,shift=1,nrows=10)

In [ ]:
sheetname=['IJ2_s']
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_table(filename,sheetname,8,shift=12,nrows=6)
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_table(filename,sheetname,14,shift=12,nrows=6)
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_table(filename,sheetname,20,shift=12,nrows=6)

2013

CN


In [ ]:
filename='rawdata/2013/CN/Event management_CN2013.xlsx'
sheetname=['IS_m','IF_m','IC_m','IJ_m','E_m','IM_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2013]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

CR


In [ ]:
filename='rawdata/2013/CR/Event management_CR2013.xlsx'
sheetname=['IF_meciuri','IJ_meciuri','IM_meciuri']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2013]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

SL


In [ ]:
filename='rawdata/2013/SL/Event management.xlsx'
sheetname=['E_meciuri']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2013]['SL']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [ ]:
sheetname=['Schema feminin']
matches[2013]['SL']=matches[2013]['SL']+\
                kendo_romania.get_matches_from_table(filename,sheetname,2,nrows=14)
sheetname=['Schema juniori']
matches[2013]['SL']=matches[2013]['SL']+\
                kendo_romania.get_matches_from_table(filename,sheetname,2,nrows=12)

2012

CN


In [ ]:
filename='rawdata/2012/CN/Event management CN2012.xlsx'
sheetname=['E_meciuri','IJ_meciuri','IF_meciuri','IM_meciuri']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2012]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

CR


In [ ]:
filename='rawdata/2012/CR/2012.05.05-06 - CR - Cluj.xlsx'
sheetname=['IC']
matches[2012]['CR']=kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,12,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=4)

In [ ]:
sheetname=['IJ']
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,14,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,19,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,24,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,30,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,35,shift=1,nrows=3)

In [ ]:
sheetname=['IF']
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=3)
column_keys={'match_type':0,'aka':{'name':1,'point1':3},
             'shiro':{'name':6,'point1':5},'outcome':4}
shift=0
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,22,shift=shift)

In [ ]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=6
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [ ]:
sheetname=['ES']
column_keys={'match_type':20,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
shift=4
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
shift=9
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

2011

CN


In [ ]:
filename='rawdata/2011/CN/2011.11.26-27 - CN - Bucuresti_print.xlsx'
sheetname=['IJ']
matches[2011]['CN']=kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,nrows=3)
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=3)
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,23,shift=1)

In [ ]:
sheetname=['IF']
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,nrows=3)
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=3)
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,23,shift=1,nrows=4)
column_keys={'match_type':0,'aka':{'name':1,'point1':3},
             'shiro':{'name':6,'point1':5},'outcome':4}
shift=0
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,28,shift=shift)

In [ ]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [ ]:
sheetname=['E']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=17
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
shift=23
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
shift=29
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)

CR


In [ ]:
filename='rawdata/2011/CR/2011.04.16-17 - CR - Miercurea Ciuc.xlsx'
sheetname=['ES']
column_keys={'match_type':6,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2011]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=5
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=11
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)

In [ ]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [ ]:
sheetname=['IF']
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,15,shift=1,nrows=4)
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,21,shift=1,nrows=4)
column_keys={'match_type':0,'aka':{'name':1,'point1':3},
             'shiro':{'name':6,'point1':5},'outcome':4}
shift=0
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,26,shift=shift)

In [ ]:
sheetname=['IJ']
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,16,shift=1,nrows=3)
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,21,shift=1,nrows=4)
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,27,shift=1,nrows=3)

In [ ]:
sheetname=['IC']
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,4,shift=0,nrows=4)

In [ ]:
sheetname=['EJ']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=0
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,15,shift=shift)

2010

CR


In [ ]:
filename='rawdata/2010/CR/2010.03.27-28 - CR - Budeasa.xlsx'
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2010]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [ ]:
sheetname=['IF']
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,15,shift=1,nrows=4)
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,21,shift=1,nrows=4)
column_keys={'match_type':0,'aka':{'name':1,'point1':3},
             'shiro':{'name':6,'point1':5},'outcome':4}
shift=0
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,26,shift=shift)

In [ ]:
sheetname=['EJ']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=0
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,15,shift=shift)

In [ ]:
sheetname=['IJ']
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,16,shift=1,nrows=3)
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,21,shift=1,nrows=4)
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,27,shift=1,nrows=3)

In [ ]:
sheetname=['IC']
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,4,shift=0,nrows=4)

CN


In [ ]:
filename='rawdata/2010/CN/2010.11.27-28 - CN - Bucuresti.xlsx'
sheetname=['IJ']
matches[2010]['CN']=kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,point_shift=0,nrows=5)

In [ ]:
sheetname=['IC']
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,nrows=3)
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=3)

In [ ]:
sheetname=['IF']
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,nrows=3)
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=3)

In [ ]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=6
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
shift=12
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [ ]:
sheetname=['E']
column_keys={'match_type':15,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
shift=5
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
shift=11
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)

2009

CN


In [ ]:
filename='rawdata/2009/CN/2009.11.28-29 - CN - Bucuresti.xlsx'
sheetname=['IJ']
matches[2009]['CN']=kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,4,shift=0,nrows=4)

In [ ]:
sheetname=['IF']
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,12,shift=1,point_shift=0,nrows=5)

In [ ]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [ ]:
sheetname=['ES']
column_keys={'match_type':1,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=5
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=11
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)

CR


In [ ]:
filename='rawdata/2009/CR/2009.04.04 - CR - Budeasa - print.xlsx'
sheetname=['IJ']
matches[2009]['CR']=kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,12,shift=1,point_shift=0,nrows=5)

In [ ]:
sheetname=['IF']
matches[2009]['CR']=matches[2009]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,point_shift=0,nrows=6)

In [ ]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2009]['CR']=matches[2009]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2009]['CR']=matches[2009]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [ ]:
sheetname=['ES']
column_keys={'match_type':1,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2009]['CR']=matches[2009]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,8,shift=shift)
shift=5
matches[2009]['CR']=matches[2009]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,8,shift=shift)

Clean up points, matches, player names


In [ ]:
def match_cleaner(year,match):
    kind,phase='Unknown','Unknown'
    if '#' in match:
        stage0=match.split('#')[0].lower()
        stage1=match.split('#')[1]
        if 'pool' in stage1: 
            phase='Pool'
        if 'Pool' in stage1: 
            phase='Pool'
        elif 'prel' in stage1: 
            phase='Prelim.'
        elif 'Prel' in stage1: 
            phase='Prelim.'
        elif 'layoff' in stage1: 
            phase='Prelim.'
        elif '- F' in stage1: 
            phase='Finals'
        elif 'F -' in stage1: 
            phase='Finals'
        elif 'Final' in stage1: 
            phase='Finals'
        elif 'SF' in stage1: 
            phase='Finals'
        elif 'QF' in stage1: 
            phase='Finals'
        elif 'A'==stage1: phase='Pool'
        elif 'B'==stage1: phase='Pool'
        elif 'C'==stage1: phase='Pool'
        elif 'D'==stage1: phase='Pool'
        elif 'E'==stage1: phase='Pool'
        elif 'F'==stage1: phase='Pool'
        elif 'G'==stage1: phase='Pool'
        elif 'H'==stage1: phase='Pool'
        elif 'I'==stage1: phase='Pool'
        elif 'J'==stage1: phase='Pool'
        elif 'K'==stage1: phase='Pool'
        elif 'L'==stage1: phase='Pool'
        elif 'M'==stage1: phase='Pool'
        elif 'N'==stage1: phase='Pool'
        elif 'O'==stage1: phase='Pool'
        elif 'P'==stage1: phase='Pool'
        elif 'Q'==stage1: phase='Pool'
        elif 'R'==stage1: phase='Pool'
        elif 'S'==stage1: phase='Pool'
        elif 'T'==stage1: phase='Pool'
        
        if 'IS' in stage1:
            kind="Senior's Individual"
        elif 'IF' in stage1:
            kind="Women's Individual"
        elif 'IM' in stage1:
            kind="Men's Individual"
        elif 'IC' in stage1:
            kind="Children's Individual"
        elif 'IJ' in stage1:
            kind="Junior's Individual"
        elif 'EJ' in stage1:
            kind="Junior's Team"
        elif 'EF' in stage1:
            kind="Men's Team"
        elif 'ES' in stage1:
            kind="Senior's Team"
            
        if 'individual masculin.' in stage0:
            kind="Men's Individual"
        if 'echipe.' in stage0:
            kind="Mixed Team"
        if 'individual juniori' in stage0:
            kind="Junior's Team"
        if 'individual feminin' in stage0:
            kind="Junior's Team"
        if 'individual veterani' in stage0:
            kind="Senior's Team"
        if 'male team' in stage0:
            kind="Men's Team"
        if 'junior 1 individual' in stage0:
            kind="Junior's Individual"
        if 'junior 2 individual' in stage0:
            kind="Junior's Individual"
        
    elif match=='F':
        kind="Women's Individual"
    elif match=='M':
        kind="Men's Individual"
    elif match=='J':
        kind="Junior's Individual"
    elif match=='SF_s':
        kind="Women's Individual"
    elif match=='SM_s':
        kind="Men's Individual"
    elif match=='J_s':
        kind="Junior's Individual"
    
    if kind=='Unknown':
        category='Unknown'
        teams='Unknown'
    else:
        category=kind.split(' ')[0][:-2]
        teams=kind.split(' ')[1]
    if year<2014: 
        category=category.replace('Senior','Men')
    if year in [2018]: 
        category=category.replace('Senior','Men')
    return category,teams,phase

Load names


In [ ]:
members=pd.read_csv('data/members_base.csv')

In [ ]:
members.head()

In [ ]:
name_exceptions={'Atanasovski':'Atanasovski A. (MAC)',
                 'Dobrovicescu (SON)':'Dobrovicescu T. (SON)',
                 'Ianăș':'Ianăș F.',
                 'Crăciun (Tamang) Sujata':'Crăciun S.',
                 'Crăciun (Tamang) Sujata':'Crăciun S.',
                 'Dinu (Ioniță) Claudia-Andreea':'Dinu A.',
                 'Arabadjiyski': 'Arabadjiyski A.',
                 'Mandia':'Mandia F.',
                 'Stanev':'Stanev A.',
                 'Mochalov':'Mochalov O.',
                 'Sozzi':'Sozzi A.',
                 'Crăciunel':'Crăciunel I.',
                 'Craciunel':'Crăciunel I.',
                 'Sagaev':'Sagaev L.',
                 'Buzás':'Búzás C.',
                 'Csala':'Csala D.',
                 'Dimitrov':'Dimitrov M.',
                 'Józsa':'Józsa L.',
                 'Creangă':'Creangă A.',
                 'Duțescu':'Duțescu M.',                 
                 'Furtună':'Furtună G.',
                 'Gârbea':'Gârbea I.',
                 'Stupu':'Stupu I.',
                 'Mahika-Voiconi':'Mahika-Voiconi S.',
                 'Mahika':'Mahika-Voiconi S.',
                 'Stanciu':'Stanciu F.',
                 'Vrânceanu':'Vrânceanu R.',
                 'Wolfs':'Wolfs J.',
                 'Ducarme':'Ducarme A.',
                 'Sbârcea':'Sbârcea B.',
                 'Mocian':'Mocian A.',
                 'Hatvani':'Hatvani L.',
                 'Dusan':'Dusan N.',
                 'Borota':'Borota V.',
                 'Tsushima':'Tsushima K.',
                 'Tráser':'Tráser T.',
                 'Colțea':'Colțea A.',
                 'Brîcov':'Brîcov A.',
                 'Yamamoto':'Yamamoto M.',
                 'Crăciun':'Crăciun D.'}
redflags_names=['-','—','—',np.nan,'. ()','— ','- -.','- -. (-)',
                'Kashi','Sankon','București','Victorii:','Sakura','Taiken','Ikada','Sonkei','CRK','Museido',
                'Ichimon','Bushi Tokukai 1','Competitori – Shiai-sha','Echipa - roşu','Numele şi prenumele',
                'Victorii:','Victorii: 0','Victorii: 1','Victorii: 2','Victorii: 3','Victorii: 4',
                'Victorii: 5','?','Kyobukan','2/5','2/6','3/8','Finala','Kyobukan (0/0/0)','―',
                '(clasament final după meci de baraj)','CRK (Bucuresti)','Kaybukan','Isshin (Cluj)',
                'Ikada (Bucureşti)','Kyobukan (Braşov)','Puncte:','KASHI','Budoshin','Isshin',
                '— (—)','4. B.','4. Baraj: Stupu M - Hostina','4. Baraj: Moise KM - Korenschi M',
               'Bushi Tokukai (2/8/17)','CRK 2 (1/6/14)', 'CRK 2','CRK 1','Loc I.:',
               'Bushi Tokukai 2 (M Ciuc)','Echipa suport']
redflags_names2=['Bushi Tokukai','Eliminatoriu','finala','Finala','Fianala','Ikada','Ichimon','Pool',
                'Locul ','Lotul ','Loc ','Grupa ','Isshin','Meciul ','Victorii:']
name_equals={'Chirea M.':'Chirea A.',
            'Ghinet C.':'Ghineț C.',
            'Domnița M.':'Domniță M.',
            'Garbea I.':'Gârbea I.',
            'Ah-hu W.':'Ah-hu S.',
            'Horvát M.':'Horváth M.',
            'Ionita A.':'Ioniță A.',
            'Medvedschi I.':'Medvețchi I.',
            'Mahika S.':'Mahika-Voiconi S.',
            'Mate L.':'Máté L.',
            'Hentea L.':'Hentea A.',
            'Stupu I.':'Stupu A.',
            'Ah-Hu S.':'Ah-hu S.',
            'Alexa I.':'Alexa A.',
            'Angelescu M.':'Angelescu M.',
            'Apostu D.':'Apostu T.',
            'Brâcov A.':'Brîcov A.',
            'Catoriu D.':'Cantoriu D.',
            'Călina A.':'Călina C.',
            'Buzás C.':'Búzás C.',
            'Korenshi E.':'Korenschi E.',
            'Pleșa R.':'Pleșea R.',
            'Galos A.':'Galoș A.',
            'Győrfi G.':'Györfi G.',
            'Győrfi S.':'Györfi S.',
            'Ghineț G.':'Ghineț C.',
            'Hostina E.':'Hoștină E.', 
            'Hostină E.':'Hoștină E.', 
            'Ianăs F.':'Ianăș F.',
            'Ianas F.':'Ianăș F.',
            'Lacatus M.':'Lăcătuș M.',
            'Máthé L.':'Máté L.',
            'Burinaru A.':'Burinaru Al.',
            'Nastase M.':'Năstase E.',
            'Oprisan A.':'Oprișan A.',
            'Pârlea A.':'Pîrlea A.',
            'Sabau D.':'Sabău D.',
            'Spriu C.':'Spiru C.',
            'Bíró S.':'Biró S.',
            'Stănculascu C.':'Stănculescu C.',
            'Vrânceanu M.': 'Vrânceanu L.',
            'Wasicek V.':'Wasicheck W.',
            'Wasicsec W.':'Wasicheck W.',
            'Wasicsek W.':'Wasicheck W.',
            'Zolfoghari A.':'Zolfaghari A.'}
name_doubles={
        'Cristea Cristina':'Cristea Cr.', 
        'Cristea Călin-Ștefan':'Cristea Că.',
        'Sandu Marius-Cristian':'Sandu Mar.', 
        'Sandu Matei-Serban':'Sandu Mat.',
        'Georgescu Andrei':'Georgescu An.', 
        'Georgescu Alexandra':'Georgescu Al.',
        'Péter Csongor':'Péter Cso.', 
        'Péter Csanád':'Péter Csa.',
        'Luca Mihnea':'Luca Mihn.', 
        'Luca Mihai-Cătălin':'Luca Miha.',
        'Luca':'Luca Miha.',
        'Luca M':'Luca Miha.',
        'Luca M.':'Luca Miha.',
        'Luca Mihai':'Luca Miha.',
        'Luca Traian-Dan':'Luca Tr.', 
        'Luca Tudor':'Luca Tu.',
        'Canceu Anamaria':'Canceu An.', 
        'Canceu Adriana-Maria':'Canceu Ad.',
        'Cioată Daniel-Mihai':'Cioată M.', 
        'Cioată Dragoș':'Cioată D.',
        'Burinaru Alexandra':'Burinaru Al.', 
        'Burinaru Andreea':'Burinaru An.',
        'Dudaș Francisc Andrei':'Dudaș F.', 
        'Dudaș Francisc':'Dudaș F.'}

letter_norm={'ţ':'ț','ş':'ș','Ş':'Ș'}
def name_cleaner(name):
    if name in name_doubles:
        return name_doubles[name]
    else:
        for letter in letter_norm:
            name=name.replace(letter,letter_norm[letter])
        if name not in name_exceptions:
            nc=name.replace('  ',' ').split('(')    
        else:
            nc=name_exceptions[name].split('(')
        rname=nc[0].strip()
        rnames=rname.split(' ')
        sname=rnames[0]+' '+rnames[1][0]+'.'
        if sname in name_equals:
            sname=name_equals[sname]

        if sname in name_doubles:
            print(name,sname)
            return sname
        else:    
            return sname

In [ ]:
def name_ok(name):
    if name==np.nan: return False
    if str(name)=='nan': return False
    if name not in redflags_names:
        if np.array([i not in name for i in redflags_names2]).all():
            return True
    return False

Standardize names


In [ ]:
all_players={}
all_players_r={}
all_players_unsorted=set()
for year in matches:
    for competition in matches[year]:
        for match in matches[year][competition]:
            for color in ['aka','shiro']:
                name=match[color]['name']
                all_players_unsorted.add(name)
                if name_ok(name):
                    name=name_cleaner(name)
                    rname=match[color]['name']
                    if rname not in all_players_r:all_players_r[rname]=name
                    if name not in all_players: all_players[name]={}
                    if year not in all_players[name]:all_players[name][year]={'names':set()}
                    all_players[name][year]['names'].add(rname)

In [ ]:
name_linker={}
for i in members.index:
    name=members.loc[i]['name']
    try:
        cname=name_cleaner(name)
    except:
        print(name)
    if cname not in name_linker:name_linker[cname]=set()
    name_linker[cname].add(name)

In [ ]:
names_abbr={}
for name in name_linker:
    if len(name_linker[name])>1:
        #only for dev to create exceptions for duplicate person names.
        print(name,name_linker[name])
    for i in name_linker[name]:
        names_abbr[i]=name

In [ ]:
names_abbr_list=[]
name_abbr2long={}
name_abbr2club={}
for i in members.index:
    name=members.loc[i]['name']
    club=members.loc[i]['club']
    year=members.loc[i]['year']
    names_abbr_list.append(names_abbr[name])
    name_abbr2long[names_abbr[name]]=name
    if names_abbr[name] not in name_abbr2club:name_abbr2club[names_abbr[name]]={}
    name_abbr2club[names_abbr[name]][year]=club

In [ ]:
members['name_abbr']=names_abbr_list

In [ ]:
all_shinpan={}
all_shinpan_r={}
all_shinpan_unsorted=set()
for year in matches:
    for competition in matches[year]:
        for match in matches[year][competition]:
            if 'shinpan' in match:
                for color in ['fukushin1','shushin','fukushin2']:
                    aka=match['aka']['name']
                    shiro=match['shiro']['name']
                    if (name_ok(aka)) and\
                       (name_ok(shiro)) and\
                       (name_cleaner(aka) in all_players) and\
                       (name_cleaner(shiro) in all_players):
                        rname=match['shinpan'][color]
                        all_shinpan_unsorted.add(rname)
                        if name_ok(rname):
                            name=name_cleaner(rname)
                            if name not in all_shinpan: all_shinpan[name]=[] 
                            all_shinpan[name].append(match)
                            if rname not in all_shinpan_r:all_shinpan_r[rname]=name

In [ ]:
name_abbr2long_extends={
    'Ishikubo S.':'Ishikubo Shinichi',
    'Yamamoto M.':'Yamamoto M.',
    'Wolfs J.':'Wolfs Jan Claude',
    'Tsushima K.':'Tsushima Kanji',
    'Ducarme A.':'Ducarme Alain',
    'Tráser T.':'Tráser Tamás',
    'Borota B.':'Borota B.',
    'Arabadjiyski A.':'Arabadjiyski Alexandar',
    'Csala T.':'Csala Tibor',
    'Sagaev L.':'Sagaev Lubomir',
    'Hatvani L.':'Hatvani Lóránt',
    'Dusan N.':'Dusan N',
    'Borota V.':'Borota Vladimir',
    'Mandia F.':'Mandia Fabrizio',
    'Stanev A.':'Stanev A.',
    'Mochalov O.':'Mochalov O.',
    'Sozzi A.':'Sozzi A.',
    'Dimitrov M.':'Dimitrov M.'
}    
for i in all_shinpan.keys():
    if i not in name_abbr2long:
        name_abbr2long[i]=name_abbr2long_extends[i]

Infer clubs


In [ ]:
#naive infer
redflags_clubs=['','N/A','RO1','RO2']
club_equals={'MLD':'MOL/Md',
             'MOL':'MOL/Md',
             'IKD':'IKA',
             'HUN':'HUN/Hu',
             'BUL':'BUL/Bg',
             'TUR':'TUR/Tr',
             'MAC':'MAC/Mc',
             'MNE':'MNE/Mn',
             'SRB':'SRB/Sr',
             'ITA':'ITA/It',
             'ISS':'ISH',
             'Musso, Bg':'MUS/Bg',
             'Makoto, Sr':'MAK/Sr',
             'Szeged, Hu':'SZE/Hu'}
for name in all_players:
    for year in all_players[name]:
        for name_form in all_players[name][year]['names']:
            if '(' in name_form:
                club=name_form.split('(')[1].strip()[:-1]
                if club in club_equals: club=club_equals[club]
                if club not in redflags_clubs:
                    all_players[name][year]['club']=club

In [ ]:
for name in all_players:
    for year in all_players[name]:
        if 'club' not in all_players[name][year]:
            #more than 1 year?
            years=np.sort(list(all_players[name].keys()))
            if len(years)>1:
                #get club from previous year
                for y in range(years[0],year):
                    if y in all_players[name]:
                        if 'club' in all_players[name][y]:
                            all_players[name][year]['club']=all_players[name][y]['club']
                #if still not found, get club from next year
                if 'club' not in all_players[name][year]:
                    #get club from next year
                    for y in np.arange(years[-1],year,-1):
                        if y in all_players[name]:
                            if 'club' in all_players[name][y]:
                                all_players[name][year]['club']=all_players[name][y]['club']

In [ ]:
for name in all_players:
    if name not in name_abbr2long:
        #infer using longest available name
        names={len(j):j  for i in all_players[name] for j in all_players[name][i]['names']}
        if len(names)>0:
            inferred_name=names[max(names.keys())]
            if '(' in inferred_name:
                inferred_name=inferred_name[:inferred_name.find('(')-1]
            print(name,inferred_name)
            name_abbr2long[name]=inferred_name
        else:
            print(name,all_players[name])

In [ ]:
for name in all_players:
    years=np.sort(list(all_players[name].keys()))
    for year in all_players[name]:
        if 'club' not in all_players[name][year]:
            #get from list
            if name in name_abbr2club:
                minyear=min(name_abbr2club[name].keys())
                if year in name_abbr2club[name]:
                    all_players[name][year]['club']=name_abbr2club[name][year]
                elif year<minyear:
                    all_players[name][year]['club']=name_abbr2club[name][minyear]
            elif len(years)>1:
                #get club from previous year
                for y in range(years[0],year):
                    if y in all_players[name]:
                        if 'club' in all_players[name][y]:
                            all_players[name][year]['club']=all_players[name][y]['club']
                #if still not found, get club from next year
                if 'club' not in all_players[name][year]:
                    #get club from next year
                    for y in np.arange(years[-1],year,-1):
                        if y in all_players[name]:
                            if 'club' in all_players[name][y]:
                                all_players[name][year]['club']=all_players[name][y]['club']

Interpolate missing years for members


In [ ]:
clubs_manual={
            'Balázs-Kercsó Z.':'BTK',
            'Nagy V.':'ISH',
            'Goró L.':'BTK',
            'Ghineț G.':'YUK',
            'Cioată E.':'KAS',
            'Leat M.':'IKA',
            'Perianu S.':'KNS',
            'Ah-hu S.':'ICH',
            'Preda A.':'CRK',
            'Salló Z.':'BTK',
            'András Z.':'BTK',
            'Neagu F.':'IKA',
            'Bódi Z.':'KYO',
            'Bumbu D.':'ISH',
            'Botean A.':'ISH',
            'Moldoveanu M.':'ISH',
            'Jeszenszki T.':'BTK',
            'Suru N.':'SAM',
            'Balázs S.':'BTK',
            'Perdi L.':'ISH',
            'Oprișan A.':'IKA',
            'Horváth D.':'BTK',
            'Sandache I.':'BTK',
            'Moise T.':'KAY',
            'Angelescu M.':'SAM',
            'Bărbulescu E.':'MUS',
            'Canceu A.':'KAS',
            'Crișan E.':'ISH',
            'Duicu T.':'KAS',
            'Dumbravă L.':'ISH',
            'Iordan R.':'IKA',
            'Jianu A.':'MUS',
            'Keresztes M.':'BTK',
            'Macavei I.':'KYO',
            'Mitelea C.':'ICH',
            'Pavel A.':'IKA',
            'Pienaru S.':'ISH',
            'Szikszai M.':'BTK',
            'Tamang S.':'SAM',
            'Tiron L.':'KNS',
            'Turdean S.':'KAS',
            'Wasicheck W.':'ISH',
            'Ștefan C.':'IKA'
}

In [ ]:
club_errors=[]
for name in all_players:
    for year in all_players[name]:
        if 'club' not in all_players[name][year]:
            #if still not found, print error, infer other way
            if name in clubs_manual:
                all_players[name][year]['club']=clubs_manual[name]
            else:
                all_players[name][year]['club']='XXX'
                club_errors.append(name)

In [ ]:
set(club_errors)

In [ ]:
clubs={}
for name in all_players:
    for year in all_players[name]:
        club=all_players[name][year]['club'] 
        if club not in clubs:clubs[club]={}
        if year not in clubs[club]:clubs[club][year]=set()
        clubs[club][year].add(name)

In [ ]:
def outcome_cleaner(outcome):
    if outcome=='E': return True
    else: return False

In [ ]:
def outcome_from_points(aka,shiro):
    if aka==shiro: return 'X',0
    elif aka>shiro: return 'A',str(aka-shiro)
    else: return 'S',str(shiro-aka)

In [ ]:
redflags_points=['Puncte']
def point_clean1(point):
    return point.replace('○','O').replace('I','H').replace('×','')\
            .replace('–','').replace('1','O').replace('—','').replace('?','')
def points_cleaner(points):
    hansoku=0
    if '∆' in points:
        hansoku=1
        points=points.replace('∆','')
    if '▲' in points:
        hansoku=1
        points=points.replace('▲','')
    if '(Ht)' in points:
        hansoku=1
        points=points.replace('(Ht)','')
    if '(victorie)' in points:
        points=points.replace('(victorie)','OO')
    points=points.strip()
    if len(points)>2:
        print(points,'error')
    elif len(points)>1:
        point1=points[0]
        point2=points[1]
    elif len(points)>0:
        point1=points[0]
        point2=''
    else:
        point1=''
        point2=''
    point1=point_clean1(point1)
    point2=point_clean1(point2)
    return point1,point2,len(points),hansoku

In [ ]:
def club_cleaner(club):
    if '/' in club:
        return club.split('/')[0],club.split('/')[1].upper()
    else:
        return club,'RO'

In [ ]:
pretty_clubs={'ARA':'Arashi', 'BSD':'Bushido', 'BTK':'Bushi Tokukai', 'BG':'Bulgaria',
              'CDO':'Coroan de Oțel', 'CRK':'Clubul Român de Kendo', 'HAR':'Hargita', 
              'ICH':'Ichimon', 'IKA':'Ikada','ISH':'Ishhin', 'IT':'Italy','HU':'Hungary',
              'KAS':'Kashi', 'KNS':'Kenshin', 'KYO':'Kyobukan', 'MC':'Macedonia',
              'SR':'Serbia', 'MN':'Montenegro', 'MD':'Moldova', 'MUS':'Museido', 
               'RON':'Ronin-do', 'SAK':'Sakura', 'SAM':'Sam-sho','SAN':'Sankon', 'SBK':'Sobukan',
               'SON':'Sonkei', 'SR':'Serbia', 'TAI':'Taiken', 'TR':'Turkey', 'XXX':'Unknown',
               'YUK':'Yu-kai','KAY':'Kaybukan'}
def pretty_club(club, country):
    if country!='RO':
        return pretty_clubs[country]
    else: return pretty_clubs[club]

In [ ]:
unregistered_members=[]
for name in all_players:
    if name not in set(members['name_abbr'].values):
        years=np.sort(list(all_players[name].keys()))
        for year in range(min(years),max(years)+1):
            if year in all_players[name]:
                iyear=year
            else:
                iyear=max(years)
            club,country=club_cleaner(all_players[name][iyear]['club'])
            if country=='RO':
                activ='Inactiv'
                dan=0
            else:
                activ=''
                dan=''
            unregistered_members.append({'name':name_abbr2long[name],
                    'club':club,'active':activ,'year':year,'dan':dan,'country':country,
                    'pretty_club':pretty_club(club,country)})

In [ ]:
members['country']='RO'

In [ ]:
members2=pd.concat([members,pd.DataFrame(unregistered_members)])

Appears in competition but Mu DAN


In [ ]:
members_mu_dan_extensions=[]
members_by_name=members2.set_index(['name'])
for year in matches:
    members_by_year=members2.set_index(['year']).loc[year]
    for competition in matches[year]:
        print(year,competition)
        for k in matches[year][competition]:
            aka=k['aka']['name']
            shiro=k['shiro']['name']
            if (name_ok(aka)) and\
               (name_ok(shiro)) and\
               (name_cleaner(aka) in all_players) and\
               (name_cleaner(shiro) in all_players):
                for a in ['aka','shiro']:
                    for h in k[a]:
                        if h=='name':
                            name=k[a][h]
                            rname=name_abbr2long[all_players_r[name]]
                            if rname not in members_by_year['name'].values:
                                dummy=members_by_name.loc[[rname]]
                                dummy=dummy[dummy['year']==min(dummy['year'])]
                                dummy=dummy.reset_index()
                                dummy['year']=year
                                members_mu_dan_extensions.append(dummy)

In [ ]:
members3=pd.concat([members2,pd.concat(members_mu_dan_extensions)])

In [ ]:
members3=members3.drop('Unnamed: 0',axis=1).drop_duplicates()

In [ ]:
members3.to_csv('data/members.csv')

In [ ]:
master_matches=[]
for year in matches:
    members_by_year=members3.set_index(['year']).loc[year].drop_duplicates()
    for competition in matches[year]:
        print(year,competition)
        for k in matches[year][competition]:
            good=True
            match={'year':year,'competition':competition}
            match['match_category'],match['match_teams'],match['match_phase']=match_cleaner(year,k['match_type'])
            if 'shinpan' in k:
                for color in ['fukushin1','shushin','fukushin2']:
                    if color in k['shinpan']:
                        if k['shinpan'][color] in all_shinpan_r:
                            match[color]=name_abbr2long[all_shinpan_r[k['shinpan'][color]]]
            aka=k['aka']['name']
            shiro=k['shiro']['name']
            if (name_ok(aka)) and\
               (name_ok(shiro)) and\
               (name_cleaner(aka) in all_players) and\
               (name_cleaner(shiro) in all_players):
                for a in ['aka','shiro']:
                    points=''
                    for h in k[a]:
                        if h=='name':
                            name=k[a][h]
                            match[a+' name']=name_abbr2long[all_players_r[name]]
                            club, country=club_cleaner(all_players[all_players_r[name]][year]['club'])
                            match[a+' club'], match[a+' country']=club, country
                            match[a+' dan']=members_by_year.set_index(['name']).\
                                        loc[match[a+' name']]['dan']
                                
                            match[a+' pretty_club']=pretty_club(club, country)
                        else:
                            point=k[a][h]
                            if str(point)=='nan': point=''
                            points=points+point
                    for redflag in redflags_points:
                        if redflag in points:
                            good=False
                    if good:
                        match[a+' point1'],match[a+' point2'],match[a+' points'],match[a+' hansoku']=points_cleaner(points)
            else:
                good=False                
            if good:
                if 'outcome' in k:
                    match['encho']=outcome_cleaner(k['outcome'])
                else: 
                    match['encho']=False
                match['winner'],match['difference']=outcome_from_points(match['aka points'],match['shiro points'])

                master_matches.append(match)

In [ ]:
data=pd.DataFrame(master_matches)

Cleanup


In [ ]:
data['aka hansoku']=data['aka hansoku'].replace(0,'').replace(1,'Δ')
data['shiro hansoku']=data['shiro hansoku'].replace(0,'').replace(1,'Δ')

In [ ]:
data.to_csv('data/matches.csv')

Group by player


In [ ]:
aka=data[[i for i in data.columns if 'shiro ' not in i]]
aka.columns=[i.replace('aka ','') for i in aka.columns]
aka['color']='aka'
aka['opponent']=data['shiro name']

In [ ]:
shiro=data[[i for i in data.columns if 'aka ' not in i]]
shiro.columns=[i.replace('shiro ','') for i in shiro.columns]
shiro['color']='shiro'
shiro['opponent']=data['aka name']

In [ ]:
extended_matches=pd.concat([aka,shiro],axis=0).reset_index(drop=True)

In [ ]:
extended_matches.head()

In [ ]:
extended_matches.to_csv('data/extended_matches.csv')

In [ ]:
p1=extended_matches[[i for i in extended_matches.columns if i!='point2']]
p2=extended_matches[[i for i in extended_matches.columns if i!='point1']]
p1.rename(columns={'point1':'point'}, inplace=True)
p2.rename(columns={'point2':'point'}, inplace=True)

In [ ]:
extended_points=pd.concat([p1,p2],axis=0).reset_index(drop=True)

In [ ]:
extended_points.to_csv('data/extended_points.csv')

In [ ]:
extended_points.columns

In [ ]:
shu=extended_points[[i for i in extended_points.columns if 'fukushin' not in i]]
shu.columns=[i.replace('shushin','shinpan') for i in shu.columns]
fk1=extended_points[[i for i in extended_points.columns if 'shushin' not in i and 'fukushin2' not in i]]
fk1.columns=[i.replace('fukushin1','shinpan') for i in fk1.columns]
fk2=extended_points[[i for i in extended_points.columns if 'shushin' not in i and 'fukushin1' not in i]]
fk2.columns=[i.replace('fukushin2','shinpan') for i in fk2.columns]
extended_shinpan=pd.concat([shu,fk1,fk2],axis=0).reset_index(drop=True)

In [ ]:
extended_shinpan.to_csv('data/extended_shinpan.csv')

In [ ]:
extended_shinpan.columns

In [ ]:
extended_shinpan['club'].unique()

Competitor statistics


In [ ]:
competitors={}
for i in data.T.iteritems():
    for a in ['aka ','shiro ']:
        name=i[1][a+'name']
        club=i[1][a+'club']
        if name not in competitors:
            competitors[name]={'U':0,'club':club}
        for j in ['point1','point2']:
            point=i[1][a+j] 
            if point!='':
                if point not in competitors[name]:competitors[name][point]=0
                competitors[name][point]+=1
        competitors[name]['U']+=1

In [ ]:
data2=pd.DataFrame(competitors)

In [ ]:
data2.T.to_csv('data/competitors.csv')

In [ ]:


In [ ]:


In [ ]: